## Daniel J. Mallinson and Patrick Burns
## Replication code for Increasing Career Confidence Through a Course in Public Service Careers
## 2017-11-06

library(foreign)

raw.data <- read.csv("spring_2017_data.csv")
data <- raw.data

#Drop observation with no post-values
data <- data[!is.na(data$post_cmi_total),]

## High-level results

t.test(data$post_cmi_total,data$cmi_total, paired=TRUE)
sd(data$cmi_total)
sd(data$post_cmi_total)
var(data$cmi_total)
var(data$post_cmi_total)
## Mean pre: 12.53
## Mean post: 18.71
## t = 2.844
## p = 0.014
## Standard deviation pre: 6.96
## Standard deviation post: 4.66
## Variance pre: 48.44
## Variance post: 21.76


t.test(data$post_cdsessf_total, data$cdsessf_total, paired=TRUE)
sd(data$cdsessf_total)
sd(data$post_cdsessf_total)
var(data$cdsessf_total)
var(data$post_cdsessf_total)
## Mean pre: 76.93
## Mean post: 86.79
## t = 4.77
## p = 0.0004
## Standard deviation pre: 8.81
## Standard deviation post: 8.76
## Variance pre: 77.54
## Variance post: 76.80

# Plot overall changes
# https://stats.stackexchange.com/questions/190223/how-to-visualize-independent-two-sample-t-test

pdf("overall_box_plots.pdf", height=4, width=8)
par(mfrow=c(1,2))
boxplot(data$cmi_total, data$post_cmi_total, col=c("gray", "gray"), names=c("Pre-Course", "Post-Course"), ylim=c(0,30), xlab = "(a) Career Maturity (CMI-R)")
#, ylab="Career Maturity Inventory - Revised"
boxplot(data$cdsessf_total, data$post_cdsessf_total, col=c("gray", "gray"), names=c("Pre-Course", "Post-Course"), ylim=c(60,100), xlab="(b) Career Self-Efficacy (CDSES-SF)")
#, ylab="Career Decision Self-Efficacy Scale (SF)"
dev.off()

pdf("overall_bar_plots.pdf", height=6, width=8)
par(mfrow=c(1,2))
m1 <- c(mean(data$cmi_total), mean(data$post_cmi_total))
names(m1) <- c("Pre-Course", "Post-Course")
se1 <- c(sd(data$cmi_total)/sqrt(length(data$cmi_total)), sd(data$post_cmi_total)/sqrt(length(data$post_cmi_total)))
bp <- barplot(m1, ylim=c(0,20), xlab="(a) Career Maturity (CMI-R)")
arrows(x0=bp, y0=m1-se1, y1=m1+se1, code=3, angle=90)

m2 <- c(mean(data$cdsessf_total), mean(data$post_cdsessf_total))
names(m2) <- c("Pre-Course", "Post-Course")
se2 <- c(sd(data$cdsessf_total)/sqrt(length(data$post_cdsessf_total)), sd(data$cdsessf_total)/sqrt(length(data$post_cdsessf_total)))
bp <- barplot(m2, ylim=c(0,100), xlab="(b) Career Self-Efficacy (CDSES-SF)")
arrows(x0=bp, y0=m2-se2, y1=m2+se2, code=3, angle=90)
dev.off()

## Look at change in Career Decision Question 26
t.test(data$post_cdsessf26, data$cdsessf26, praired=TRUE)
mean(data$cdsessf26, na.rm=TRUE)
mean(data$post_cdsessf26, na.rm=TRUE)
#Mean pre: 3.1
#Standard Deviation pre: 0.70
#Mean post: 3.8
#t = 3.3538
#p = 0.003

## Examine Correlations between difference scores and control variables
data$cmidif <- data$post_cmi_total - data$cmi_total
data$cdsessfdif <- data$post_cdsessf_total - data$cdsessf_total

#Correlation with class attendence
library(Hmisc)
rcorr(cbind(data$cmidif, data$prop_attendance), type="pearson")
rcorr(cbind(data$cdsessfdif, data$prop_attendance), type="pearson")

#Correlation with completed credits
rcorr(cbind(data$cmidif, data$credits), type="pearson")
rcorr(cbind(data$cdsessfdif, data$credits), type="pearson")

#Correlation with age
rcorr(cbind(data$cmidif, data$age), type="pearson")
rcorr(cbind(data$cdsessfdif, data$age), type="pearson")

#Correlation between class attendence and credits
rcorr(cbind(data$prop_attendance, data$credits), type="pearson")